#coding:utf-8
#! ‪C:\Developer\python36\python3.exe

import lxml
import  lxml.etree

def getDatas():
    #读取本地保存的完整html代码
    htmlFile = open('html.txt', 'r', encoding='utf-8')
    html = htmlFile.read()
    htmlFile.close()
    dataFile = open('zhihu.txt','w',encoding='utf-8')
    myTree = lxml.etree.HTML(html)
    divList = myTree.xpath('//div[@class="TopstoryMain"]/div//div')#找到每一个问题的div
    for div in divList:
        title = div.xpath('.//a[@data-za-detail-view-element_name="Title"]/text()')#问题的标题
        man = div.xpath('./div[1]/div[2]/div/div/div/div/div/span/div/div/a/text()')#答主
        answerList = div.xpath('./div[1]/div[2]/div[2]/div[2]/span//text()')#回答的内容
        times = div.xpath('./div[1]/div[2]/div[2]/div[3]/div/a/span/@data-tooltip')#发布时间
        #将数据有格式地写入txt中
        if len(answerList) == 0:
            continue
        if len(title):
            dataFile.write(title[0]+'\n')
            print(title[0])

        if len(man):
            dataFile.write('答主：'+man[0]+'\n')

        for p in answerList:
            dataFile.write(p + '\n')
            print(p)

        if len(times):
            dataFile.write(times[0])

        dataFile.write('\n\n\n\n\n')
        print('\n')
    dataFile.close()
# getDatas()